Highlighting elements

Elizabeth King
Kevin Middleton

Deception with color

  • Applying a sequential color scheme to an unordered variable
  • Using a diverging color scale when there is no natural midpoint to the data
  • Binning a continuous variable into a discrete scale

Deception 1

Applying a sequential color scheme to an unordered variable

palmerpenguins::penguins |> 
  ggplot(aes(x = body_mass_g, y = flipper_length_mm, color = species)) +
  geom_point(size = 3) +
  scale_color_brewer(type = "seq", name = NULL) +
  labs(x = "Body Mass (g)", y = "Flipper Length (mm)")

Deception 1

Deception 2

Using a diverging color scale when there is no natural midpoint to the data

LW <- ggridges::lincoln_weather |> 
  mutate(CST = lubridate::ymd(CST))

ggplot(LW, aes(x = CST, y = `Max Temperature [F]`,
               fill = `Max Temperature [F]`)) +
  geom_bar(stat = "identity") +
  scale_fill_gradient2(
    low = "red",
    mid = "white",
    high = "blue",
    midpoint = mean(ggridges::lincoln_weather$`Max Temperature [F]`)) +
  labs(x = "Date", y = "Maximum Temperature (F)")

Deception 2

Deception 3

Binning a continuous variable into a discrete scale

LW <- LW |> 
  mutate(Max_Temp = (`Max Temperature [F]` %/% 20 * 20) |> factor())

ggplot(LW, aes(x = `Mean Wind Speed[MPH]`,
               y = `Max Humidity`,
               color = Max_Temp)) +
  geom_point(pch = 16,
             size = 5,
             alpha = 0.5,
             position = position_jitter(width = 0.2, height = 0.2,
                                        seed = 3437)) +
  scale_color_brewer(type = "qual", name = "Temperature", palette = "Set1") +
  scale_y_continuous(limits = c(50, 103), name = "Maximum Humidity (%)") +
  labs(x = "Mean Wind Speed (MPH)")

Deception 3

ggplot gradients for color and fill

  • scale_color_gradient: Low to high
  • scale_color_gradient2: Low to mid to high
  • scale_color_gradientn: any custom values

A lot of fine control about how the space is divided among colors

  • With power comes responsibility

Temperature on the Celsius scale

LW <- LW |> 
  mutate(Mean_Temp_C = 5 / 9 * (`Mean Temperature [F]` - 32))

ggplot(LW, aes(x = CST, y = Mean_Temp_C,
               fill = Mean_Temp_C)) +
  geom_hline(yintercept = 0) +
  geom_bar(stat = "identity") +
  scale_fill_gradient2(
    low = "blue",
    mid = "purple",
    high = "red",
    midpoint = 0,
    name = NULL) +
  labs(x = "Date", y = "Mean Temperature (C)")

Temperature on the Celsius scale

Tree trunks data

DBH and trunk flare for 4 species of maple in 6 cities in Minnesota1

# A tibble: 275 × 4
   Species          City        DBH    TF
   <chr>            <chr>     <int> <dbl>
 1 Acer platanoides Rochester    23   8.7
 2 Acer platanoides Rochester    21   6.5
 3 Acer platanoides Rochester    21   6.8
 4 Acer platanoides Rochester    16   6.7
 5 Acer platanoides Rochester    16   9  
 6 Acer platanoides Rochester    28  12.7
 7 Acer platanoides Rochester    16   7.5
 8 Acer platanoides Rochester    14   6.8
 9 Acer platanoides Rochester    21   7.9
10 Acer platanoides Rochester    16   5.2
# … with 265 more rows

Visualizing tree trunks data

  • geom_violin()
  • ggforce::geom_sina()1
Trunks |> 
  ggplot(aes(x = Species, y = DBH, color = Species)) +
  geom_violin() +
  geom_sina(seed = 74645)

Visualizing tree trunks data

Map specific colors to factors

  • By default scale_color_manual() assigns colors in factor order
  • Use breaks to specify the order you prefer
  • Hex codes okay: coolors.co
Trunks |> 
  ggplot(aes(x = Species, y = DBH, color = Species)) +
  geom_violin() +
  geom_sina(seed = 74645) +
  scale_color_manual(values = c("red", "orange", "blue", "purple"),
                     breaks = c("Acer saccharinum", "Acer rubrum",
                                "Acer platanoides", "Acer saccarum"),
                     guide = NULL) +
  theme(axis.text.x = element_text(face = "italic"))

Map specific colors to factors

Create an explicit color map function

tree_color_map <- function(){
  cmap <- ggplot2::scale_color_manual(
    name = "Species",
    values = c("Acer saccharinum" = "red",
               "Acer rubrum" = "orange",
               "Acer platanoides" = "blue",
               "Acer saccarum" = "purple"),
    guide = NULL)
  return(cmap)
}

Trunks |> 
  ggplot(aes(x = Species, y = DBH, color = Species)) +
  geom_violin() +
  geom_sina(seed = 74645) +
  tree_color_map() +
  theme(axis.text.x = element_text(face = "italic"))

Create an explicit color map function

Labeling points with ggrepel

ggplot(EF, aes(x = n_Upstream,
               y = n_Downstream,
               label = Tributary)) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  geom_point(color = "tomato", size = 3) +
  geom_text_repel(point.padding = 0.3) +
  coord_equal(xlim = c(0, 30), ylim = c(0, 30)) +
  labs(x = "Upstream", y = "Downstream")

Labeling points with ggrepel

Arrows

ggplot(EF, aes(x = n_Upstream,
               y = n_Downstream,
               label = Tributary)) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  geom_point(color = "tomato", size = 3) +
  geom_text_repel(box.padding = 2, point.padding = 0.5,
                  arrow = arrow(length = unit(0.015, "npc")),
                  seed = 457849) +
  coord_equal(xlim = c(0, 30), ylim = c(0, 30)) +
  labs(x = "Upstream", y = "Downstream")

Arrows

Label a subset of points

EF <- EF |> 
  mutate(lab = if_else(Tributary %in% c("Manacapuru", "Madeira"), "in", "out"))

ggplot() +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  geom_point(data = EF,
             aes(x = n_Upstream,
                 y = n_Downstream),
             color = "tomato", size = 3) +
  geom_text_repel(data = EF |> filter(lab == "in"),
                  aes(x = n_Upstream,
                      y = n_Downstream,
                      label = Tributary),
                  box.padding = 2, point.padding = 0.5,
                  arrow = arrow(length = unit(0.015, "npc")),
                  seed = 3434) +
  coord_equal(xlim = c(0, 30), ylim = c(0, 30)) +
  labs(x = "Upstream", y = "Downstream")

Label a subset of points

Use labels instead of a legend

PP_lab <- palmerpenguins::penguins |>
  drop_na() |> 
  group_by(species) |> 
  summarize(mean_x = mean(body_mass_g),
            mean_y = mean(bill_length_mm),
            .groups = "drop")

ggplot() +
  geom_point(data = palmerpenguins::penguins |> drop_na(),
             aes(x = body_mass_g, y = bill_length_mm, color = species)) +
  geom_label(data = PP_lab,
             aes(x = mean_x, y = mean_y, label = species, color = species),
             size = 7) +
  scale_color_paletteer_d(`"feathers::cassowary"`, guide = NULL)

Use labels instead of a legend

Marking set of points: ggforce

geom_mark_{rect, circle, ellipse, hull}()

ggplot(data = palmerpenguins::penguins |> drop_na(),
       aes(x = body_mass_g, y = bill_length_mm, color = species)) +
  geom_point() +
  geom_mark_ellipse(aes(fill = species, label = species)) +
  scale_color_paletteer_d(`"ggsci::default_jco"`, guide = NULL) +
  scale_fill_paletteer_d(`"ggsci::default_jco"`, guide = NULL)

Marking set of points: ggforce

Marking set of points: ggforce

geom_mark_{rect, circle, ellipse, hull}()

ggplot(data = palmerpenguins::penguins |> drop_na(),
       aes(x = body_mass_g, y = bill_length_mm, color = species)) +
  geom_point() +
  geom_mark_hull(aes(fill = species, label = species)) +
  scale_color_paletteer_d(`"ggsci::default_jama"`, guide = NULL) +
  scale_fill_paletteer_d(`"ggsci::default_jama"`, guide = NULL)

Marking set of points: ggforce